
/*
Output: Figure 3. Observed and Predicted Percent Area Harvested
		Figure SI2. Estimated Coefficients from an Autoregressive Model of Percent Area Harvested
*/


run "/Users/holyfantastic/Dropbox/Forest/Paper/Nature/FinalDocuments/Code/000declare_path" // change to your local path


**# Housekeeping and clean data 

set scheme white_jet

use "ParcelLevelDataSept282023_VerNov82024.dta" , clear
// drop if group  == 2 // drop "In Process"
* demean ln_price
gen ln_pricec = ln(price)
qui: sum ln_price
gen ln_price_dm = ln_price - r(mean)

* clean data
**# Figure SI.2

qui:{
/*
keep group landowner_id owner_name ffcpLat ffcpLong iperson ilocal iregion dis_road county fips forestAcres forestAcresTotal /*
*/ forestAcresEngaged forestAcresTotalEngaged ndvi_landtr_mean_forest ienrolled year
*/

la var forestAcres "Forested acres of parcel"
la var forestAcresTotal "Total forested acres owned by owner"

*Requiring at least one acre of forest
drop if forestAcres<1
drop if taxidnum == ""

*Focusing on pre enrollment years (pre-2020)
drop if year>2019

*Mean harvesting by year
bysort year: egen pcut_mean=mean(Landtr_Cut_mean_forest)
bysort year: egen cut_acres_total=total(cut_acres)

*clean variables
gen forestAcres2=forestAcres^2
gen distance2=distance^2
gen long_owner = YearsOwned > 10
gen ln_price  = log(price)

*recode ndvi
replace ndvi_landtr_mean_forest = . if ndvi_landtr_mean_forest <=0 
replace ndvi_landtr_mean_forest = ndvi_landtr_mean_forest / 1000

*demean ndvi and ndvi2

foreach var in ndvi_landtr_mean_forest  {
	qui:sum `var'
	replace `var' = `var' - r(mean)
}
gen  ndvi_landtr_mean_forest2 = ndvi_landtr_mean_forest^2
gen  ndvi_landtr_mean_forest3 = ndvi_landtr_mean_forest^3

// gen  ndvi_landtr_mean_forest2_dm = ndvi_landtr_mean_forest^2

*Let's do everything with the percent harvested, not the share harvested
replace Landtr_Cut_mean_forest = Landtr_Cut_mean_forest * 100

**# regression analysis

**## look at key variables

tab group
egen taxidnum_id  = group(taxidnum)
cap egen county_id = group(county)
xtset  taxidnum_id year
sum Landtr_Cut_mean_forest

}


{
global controls  i.forestAcresGroup i.roadGroup  ///
				i.yearGroup i.iPerson  ///
				i.iLocal

global y Landtr_Cut_mean_forest
global x ndvi_landtr_mean_forest

cap macro drop lag_cut_list
cap macro drop lag_ndvi_list

global max_lag = 15
forvalues i = 1(1)$max_lag{
	
	gen l`i'_x = l`i'.$x
	gen l`i'_y = l`i'.$y
	global lag_cut_list $lag_cut_list l`i'_y
	global lag_ndvi_list $lag_ndvi_list l`i'_x
	
}

/*Nov 4 update*/
macro drop lag_cut_list25
macro drop lag_ndvi_list25

forvalues i = 1(1)25{
	
	cap gen l`i'_x = l`i'.$x
	cap gen l`i'_y = l`i'.$y
	global lag_cut_list25 $lag_cut_list25 l`i'_y
	global lag_ndvi_list25 $lag_ndvi_list25 l`i'_x
	
}
macro drop lag_cut_list5
macro drop lag_ndvi_list5

forvalues i = 1(1)5{
	
	cap gen l`i'_x = l`i'.$x
	cap gen l`i'_y = l`i'.$y
	global lag_cut_list5 $lag_cut_list5 l`i'_y
	global lag_ndvi_list5 $lag_ndvi_list5 l`i'_x
	
}


dis "$lag_ndvi_list"

}

save "$temp/temp_clean_data.dta" , replace

**## Implement once and Plot 
use "$temp/temp_clean_data.dta" ,clear
cap rm "$table/table_autoregressive_coe.xls"

preserve
	reghdfe $y $lag_cut_list5 ln_price_dm , a(taxidnum_id) vce(cl taxidnum_id) 
	outreg2 using "$table/table_autoregressive_coe.xls" , append
	matrix A=J(30,4,0)
	forvalues i=1(1)5{ 
	   scalar a`i'=_b[l`i'_y] 
	   scalar b`i'=(_b[l`i'_y]+1.96*_se[l`i'_y])
	   scalar c`i'=(_b[l`i'_y]-1.96*_se[l`i'_y])
		mat A[`i',1]=`i'
		mat A[`i',2]=a`i'
		mat A[`i',3]=b`i'
		mat A[`i',4]=c`i'
	}

	clear
	svmat A
	gen year = A1
	drop if year == 0
	save "$temp/temp_A" , replace
restore	

preserve

	reghdfe $y $lag_cut_list ln_price_dm , a(taxidnum_id) vce(cl taxidnum_id) 
	outreg2 using "$temp/table_autoregressive_coe.xls" , append
	
	matrix B=J(30,4,0)
	forvalues i=1(1)15{ 
	   scalar a`i'=_b[l`i'_y] 
	   scalar b`i'=(_b[l`i'_y]+1.96*_se[l`i'_y])
	   scalar c`i'=(_b[l`i'_y]-1.96*_se[l`i'_y])
		mat B[`i',1]=`i'
		mat B[`i',2]=a`i'
		mat B[`i',3]=b`i'
		mat B[`i',4]=c`i'
	}
	clear
	svmat B
	gen year = B1
	drop if year == 0
	replace B1 = B1 + 0.2
	save "$temp/temp_B" , replace
	
restore	

preserve

	reghdfe $y $lag_cut_list25 ln_price_dm , a(taxidnum_id) vce(cl taxidnum_id) 
	outreg2 using "$table/table_autoregressive_coe.xls" , append
	
	matrix C=J(30,4,0)
	forvalues i=1(1)25{ 
	   scalar a`i'=_b[l`i'_y] 
	   scalar b`i'=(_b[l`i'_y]+1.96*_se[l`i'_y])
	   scalar c`i'=(_b[l`i'_y]-1.96*_se[l`i'_y])
		mat C[`i',1]=`i'
		mat C[`i',2]=a`i'
		mat C[`i',3]=b`i'
		mat C[`i',4]=c`i'
	}
	clear
	svmat C
	gen year = C1
	drop if year == 0
	replace C1 = C1 + 0.2
	save "$temp/temp_C" , replace
	
restore	

use "$temp/temp_A" , clear
merge 1:1 year using "$temp/temp_B", nogen
merge 1:1 year using "$temp/temp_C", nogen

twoway  /// 
(scatter A2 A1,  mcolor(red%50) msize(vsmall) connect(l) lcolor(red%50) lpattern(dot) lw( medthin  ))  ///
(rcap A3 A4 A1, lcolor(red%50) lpattern(solid) lw(  medium  )) ///
 (scatter B2 B1, ms(d) mcolor(gs4) msize(vsmall) connect(l) lcolor(gs4) lpattern(dot) lw( medthin  ))  ///
(rcap B3 B4 B1, lcolor(gs4) lpattern(solid) lw(  medium  )) ///
(scatter C2 C1, ms(t) mcolor(blue%50) msize(vsmall) connect(l) lcolor(blue%50) lpattern(dot) lw( medthin  ))  ///
(rcap C3 C4 C1, lcolor(blue%50) lpattern(solid) lw(  medium  )) ///
 , yline(0, lcolor(gs10)) ///  
  ytitle("Percent Harvested") xlabel( , nogrid) ylabel( , nogrid)  ///
  legend(on order(2 "Model with 5 lags" 4 "Model with 15 lags" 6 "Model with 25 lags") ring(0) pos(5) rows(1)) ///
   xtitle(" Lagged year ")

   foreach fig_path in figure  {
	
	 dis  "$`fig_path'/fig_autoregressive_coe_R1"
		
	graph export "$`fig_path'/fig_autoregressive_coe_R1.png", as(png) name("Graph") replace
	graph export "$`fig_path'/fig_autoregressive_coe_R1.pdf", as(pdf) name("Graph") replace

}
  
 
 


**## Figure 3
use "$temp/temp_clean_data.dta" , clear
	
{
	
	**## Estimation stage
	
	cap drop y_res fe
	reghdfe $y $lag_cut_list25 ln_price_dm , a(fe= taxidnum_id) vce(cl taxidnum_id) res(y_res)
	
	forvalues i = 1(1)25{
		label var l`i'_y "`i' period lag of outcomes"
	}

	
	**### export auto regression tabel in Appendix
	global b = _b[_cons]
	gen predict_harvested_base = 0 + y_res + $b 
	gen predict_harvested_hat = $y - y_res - fe 
	gen predict_fe = fe
	
	**## Predicting stage: Year by year 
	* real data
	cap drop temp*
	forvalues i = 1985(1)2019{
		
// 		bys taxidnum_id: gen temp = Landtr_Cut_mean_forest if year == `i'
		bys taxidnum_id: gen temp = predict_harvested_hat if year == `i'
		bys taxidnum_id: egen harvested_data_`i' = mean($y) if year == `i'
		gen harvested_`i' = harvested_data_`i'

		cap drop temp*

	}
	
	* in sample prediction data
	cap drop temp*
	forvalues i = 2010(1)2019{
		
		bys taxidnum_id: gen temp = predict_harvested_hat if year == `i'
		bys taxidnum_id: egen harvested_insample_perdict_`i' = mean(temp)

		cap drop temp*

	}
	
	**## Comparison stage
	collapse (mean)  harvested_* predict_harvested_base predict_harvested_hat predict_fe  (max) roadGroup iLocal iPerson yearGroup forestAcresGroup [aw = forestAcres] , by(taxidnum_id ienrolled group )


	
	* predicted data

	forvalues yr = 2020(1)2039{

// 		dis as text "predicting year " `yr'
		local predict_yr = `yr'
		local k = 25
		local ub = `predict_yr' - 1
		local lb = `predict_yr' - `k'
		cap drop harvested_`predict_yr'
		gen harvested_`predict_yr'= predict_harvested_base

		forvalues i = `lb'(1)`ub'{
			dis `i' `k'
			replace harvested_`predict_yr' = harvested_`predict_yr' + _b[l`k'_y] *  harvested_`i' 
			local k = `k' - 1
			
		}

	}
	
	
/*Maybe */
preserve

	collapse (mean) harvested_* , by(group)
	reshape long harvested_ harvested_insample_perdict_ sd_harvested_ , i(group) j(year)
	gen bs_id = 0
	save "$temp/predict_output" , replace
	
	
		tw || /// 
	line harvested_ year if year<= 2019 & group ==0, lc(red) lp(solid) || ///
	line harvested_ year if year<= 2019 & group ==3, lc(blue) lp(solid)  || ///
	line harvested_ year if year>= 2019 & group ==0, lc(red) lp(dash)  || ///
	line harvested_ year if year>= 2019 & group ==3, lc(blue) lp(dash)  || ///
	, legend(on order(1 "Unengaged, data" 2 "Enrolled, data" 3 "Unengaged, predict" 4 "Enrolled, predict" ) rows(4) ring(0) pos(1)) ytitle("Average annual percent forested area cut") xtitle("Year")
	
	foreach fig_path in figure  {
	
restore

}

**## Start Bootstrap

capture program drop prediction_yoy
program define prediction_yoy,rclass
version 18.0

preserve

	
	**## Estimation stage
// reghdfe $x $lag_cut_list  , a(taxidnum_id) vce(cl taxidnum_id)

	cap drop y_res fe
	/*Nov 7*/
	reghdfe $y $lag_cut_list25  ln_price_dm , a(fe= taxidnum_id) vce(cl taxidnum_id) res(y_res)
	// predict y_predict, xbd
	// predict temp, xb
	// gen temp2 = temp + fe + y_res
	// br temp2 $y
	// gen predict_harvested_base = 0 + y_res + _b[_cons]
	gen predict_harvested_base = 0 + y_res + $b 
	gen predict_harvested_hat = $y - y_res - fe 
	gen predict_fe = fe
	global b = _b[_cons]
	// reg fe i.group

// 	keep if county != "Bedford" &  year >= YearBought // current owner
	**## Predicting stage: Year by year 
	* real data
	cap drop temp*
	forvalues i = 2000(1)2019{
		
// 		bys taxidnum_id: gen temp = Landtr_Cut_mean_forest if year == `i'
		bys taxidnum_id: gen temp = predict_harvested_hat if year == `i'
// 		bys taxidnum_id: egen harvested_`i' = mean(temp)
		bys taxidnum_id: egen harvested_`i' = mean($y) if year == `i'
		cap drop temp*

	}
	* connecting year: 2019
// 	bys taxidnum_id: gen temp = Landtr_Cut_mean_forest if year == 2019
	bys taxidnum_id: gen temp = predict_harvested_hat if year == 2019
// 	bys taxidnum_id: egen harvested_data_2019 = mean($y) if year == 2019
	bys taxidnum_id: egen harvested_insample_perdict_2019 = mean(temp)
	cap drop temp*
	

	**## Predicting stage
	collapse (mean) harvested_*   predict_harvested_base predict_harvested_hat predict_fe (max) roadGroup iLocal iPerson yearGroup forestAcresGroup , by(taxidnum_id ienrolled group )

	* predicted data
	forvalues yr = 2020(1)2039{

// 		dis as text "predicting year " `yr'
		local predict_yr = `yr'
		local k = 15
		local ub = `predict_yr' - 1
		local lb = `predict_yr' - `k'
		cap drop harvested_`predict_yr'
		gen harvested_`predict_yr'= predict_harvested_base

		forvalues i = `lb'(1)`ub'{
			dis `i' `k'
			replace harvested_`predict_yr' = harvested_`predict_yr' + _b[l`k'_y] *  harvested_`i' 
			local k = `k' - 1
			
		}

	}

	**## Comparison stage
	
	collapse (mean) harvested_*  , by(group)
	reshape long harvested_ sd_harvested_ , i(group) j(year)
	
	save "$temp/temp_bs_predict_output" , replace
	
// 	collapse (mean) harvested_* , by(ienrolled)
// 	reshape long harvested_ , i(ienrolled) j(year)
//	
// 	reg harvested_ ienrolled if year> 2019 & year <= 2030
// 	return scalar myb_20_30 = _b[ienrolled]
//	
// 	reg harvested_ ienrolled if year> 2019 
// 	return scalar myb_20_39 = _b[ienrolled]
//	


restore

end

set seed 1234

use "$temp/temp_clean_data.dta" , clear
preserve
	duplicates drop year ,  force
	keep year
	save "$temp/year_base", replace
restore 	
duplicates drop taxidnum ,  force
keep taxidnum taxidnum_id	
count 
global N = r(N)

dis as text "---------------"
dis as text "Start Bootstrap"

	**### bsample
	global N_bs = 250
	local i = 1 
	while `i' <= $N_bs {
		
		qui:{
			
			use "$temp/temp_clean_data.dta" , clear
			duplicates drop taxidnum ,  force
			keep taxidnum taxidnum_id
			
			bsample $N
			
			cross using "$temp/year_base"
			merge m:1 taxidnum_id year using "$temp/temp_clean_data.dta" , keep(3) nogen
			save "$temp/temp_bs_sample.dta" , replace
		/*bs draw are tricky here because we need balanced panel data so the draw have to be at panel id level */
		// simulate beta1=r(myb_20_30) beta2=r(myb_20_39), reps(100) dots  saving(regdat,replace) : prediction_yoy
		//
		// bootstrap , nowarn reps(50) seed(1234) dots: prediction_yoy

			qui: prediction_yoy
			use  "$temp/temp_bs_predict_output" , clear
			gen bs_id = `i'
			save "$temp/temp_bs_predict_output" , replace
			
			use "$temp/predict_output" , clear
			append using  "$temp/temp_bs_predict_output" 
			save "$temp/predict_output" , replace
			
		}
		
		local i = `i' + 1
		di "." _cont
		
	
	}

dis as text "Done!"
dis as text "---------------"

sort group year bs_id

save "$temp/temp.dta", replace



	**### find SE and plot
	use "$temp/temp.dta",  clear
	gen harvested_2 = harvested_
	gen harvested_0  = harvested_ if  bs_id == 0
	collapse (max) harvested_0 (mean) harvested_mean = harvested_ (p97) harvested_ub = harvested_  (p3) harvested_lb = harvested_  , by(group year)

	keep if year >= 2000
	set scheme white_jet
	tw || /// 
	rarea harvested_ub harvested_lb year  if group ==0 , color(red%15) lwidth(none) || ///
	rarea harvested_ub harvested_lb year  if group ==3 , color(blue%15) lwidth(none) || ///
	line harvested_mean year if year<= 2019 & group ==0, lc(red) lp(solid) || ///
	line harvested_mean year if year<= 2019 & group ==3, lc(blue) lp(solid)  || ///
	line harvested_mean year if year>= 2019 & group ==0, lc(red) lp(dash)  || ///
	line harvested_mean year if year>= 2019 & group ==3, lc(blue) lp(dash)  || ///
	, legend(on order(3 "Unengaged, data" 4 "Enrolled, data" 5 "Unengaged, predict" 6 "Enrolled, predict" ) rows(2) ring(0) pos(1) ) xtitle("Panel A: Average annual percent harvested",size(small)) ///
				 ytitle("Percent Area Harvested")  xlabel(2000(10)2040, nogrid) ylabel(, nogrid) xsca(alt )  fysize(60) graphregion(margin(small)) saving(habove, replace)


	**### t-test plot overtime
	use "$temp/temp.dta",  clear
	keep if group == 0 | group == 3
	keep if year >= 2020
	keep group year bs_id harvested_
	reshape wide harvested_ ,i(year bs_id) j(group)
	gen diff_harvested = harvested_3 - harvested_0 
	
	preserve
		collapse (mean) mean_diff_harvested = diff_harvested  , by( year )
		egen sum_mean_diff_harvested = sum(mean_diff_harvested)
		egen average_mean_diff_harvested = mean(mean_diff_harvested)

	restore
	
	collapse  (mean) mean_diff_harvested = diff_harvested (p97) mean_diff_harvested_ub = diff_harvested  (p3) mean_diff_harvested_lb = diff_harvested  , by( year)
	
twoway (bar mean_diff_harvested  year, color(gs10)  lcolor(gs10) )  ///
(rcap mean_diff_harvested_lb mean_diff_harvested_ub year, lcolor(gs10) lpattern(solid) ) ///
 ,  yline(0, lcolor(gs10)) legend(off) ///
				 ytitle("Percent Area Harvested")  ///  
 subtitle("Panel B: Difference in predicted harvesting: Enrolled - Unengaged",size(small)) xlabel(2000(10)2040, nogrid) xtitle(Year) ///
	ylabel(, nogrid) fysize(40) graphregion(margin(vsmall))  ///
 yline(0) legend(off) title("",size(medium))   saving(hbelow, replace)


 			graph combine  habove.gph hbelow.gph  ///
		,    cols(1)      imargin(0 0 0 0) ///
		graphregion(margin(medium))  xsize(4)  ysize(3) 
 
 
foreach fig_path in figure  {
	
	 dis  "$`fig_path'/fig3_prediction_R1"
		
	graph export "$`fig_path'/fig3_prediction_R1.png", as(png) name("Graph") replace
	graph export "$`fig_path'/fig3_prediction_R1.pdf", as(pdf) name("Graph") replace

}

 
 